# -*- coding: utf-8 -*-
import csv
import scrapy
from lxml import html as htm

# 请求头
USER_AGENT = "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36"

class MovieSpider(scrapy.Spider):
    name = 'movie'
    def start_requests(self):
        #构造爬取地址列表
        urls = []
        for i in range(1,11):
            # print(i)
            # print(i*25-25)
            urls.append('https://movie.douban.com/top250?start={}&filter='.format(i*25-25))

        #预先生成csv表格标题
        data = [['name','ename','actor','year','country','category','score','review']]

        #打开文件句柄
        file = open('data.csv','a+',encoding ='utf-8', newline="")

        #csv操作对戏
        csv_writer = csv.writer(file)

        #写入csv表头
        csv_writer.writerows(data)

        #关闭文件流
        file.close()

        for url in urls:
            yield scrapy.Request(url=url,headers={"User-Agent": USER_AGENT},callback=self.parse)

    def parse(self, response):
        #解析html页面
        html = htm.fromstring(response.body)

        # print(response.body)

        #解析获取页面中li元素（包含电影信息的元素）
        liList = html.xpath('//ol[@class="grid_view"]/li')

        #数据存储
        data = []

        # print(liList)
        #解析获取数据
        for li in liList:
            name = li.xpath('./div/div[@class="info"]/div[@class="hd"]/a/span[1]/text()')[0].replace(" ","")
            ename = li.xpath('./div/div[@class="info"]/div[@class="hd"]/a/span[2]/text()')[0].strip().replace("\xa0","").replace("\n","").replace("/","")
            actor = li.xpath('./div/div[@class="info"]/div[@class="bd"]/p/text()')[0].strip().replace("\xa0","").replace("\n","").split("<br>")[0]
            desc = li.xpath('./div/div[@class="info"]/div[@class="bd"]/p/text()')[1].strip().replace("\xa0","").replace("\n","")

            year = desc.split("/")[0]
            country = desc.split("/")[1]
            category = desc.split("/")[2]

            score = li.xpath('./div/div[@class="info"]/div[@class="bd"]/div[@class="star"]/span[2]/text()')[0]
            review = li.xpath('./div/div[@class="info"]/div[@class="bd"]/div[@class="star"]/span[4]/text()')[0].replace("人评价","")

            print([name,ename,actor,year,country,category,score,review])

            data.append([name,ename,actor,year,country,category,score,review])

        file = open('data.csv','a+',encoding ='utf-8', newline="")

        csv_writer = csv.writer(file)

        csv_writer.writerows(data)

        file.close()
